In [46]:
    
%matplotlib inline
import logging
import sys
import os
import glob
import math
import ogr
import shapely.geometry, shapely.wkt
import pylab
import matplotlib.pyplot as plt
from utils.shapely_plot import draw
import shapely as sl
import fiona
import numpy as np
pylab.rcParams['figure.figsize'] = (17.0, 15.0)
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    
In [329]:
    
def convert(input_path):
    output_path = os.path.splitext(input_path)[0] + '.kml'
    filename = os.path.splitext(os.path.basename(input_path))[0]
    dst_ds = ogr.GetDriverByName('KML').CreateDataSource(output_path)
    dst_lyr = dst_ds.CreateLayer(filename)
    # create fields using OGR
    src_ds = ogr.Open(input_path)
    src_lyr = src_ds.GetLayerByIndex(0)
    f = src_lyr.GetFeature(0)
    [dst_lyr.CreateField(f.GetFieldDefnRef(i)) for i in range(f.GetFieldCount())]
    for feat in src_lyr:
        try:
            geom = shapely.wkt.loads(feat.GetGeometryRef().ExportToWkt())
        except Exception as e: 
            print('Error({0}), skipping geometry.'.format(e))
            continue
        #id = feat.GetField('HYBAS_ID')
        count = get_geom_coord_count(geom)
        # geom coord count
        # print('{0}: {1}'.format(id, count))
        if count > 10000:
            print ('simplifying ...')
            geom = geom.simplify(0.004)
        
        # uncomment in case of messed-up geometry
        #
        if not geom.is_valid:
            geom = geom.buffer(0.0)
        # geom = geom.simplify(0.004)
        # if id == 6030021870:
        #    print ('simplifying ...')
        #    geom = geom.simplify(0.004)
        f = ogr.Feature(dst_lyr.GetLayerDefn())
        # set field values
        for i in range(feat.GetFieldCount()):
            fd = feat.GetFieldDefnRef(i)
            f.SetField(fd.GetName(), feat.GetField(fd.GetName()))
        # set geometry    
        f.SetGeometry(ogr.CreateGeometryFromWkt(geom.to_wkt()))
        
        # create feature
        dst_lyr.CreateFeature(f)
        
        f.Destroy() 
        
    src_ds.Destroy()
    dst_ds.Destroy()
# files = glob.glob(r'D:\gis\HydroBASINS\without_lakes\hybas_sa_lev*_v1c.shp')
# files = glob.glob(r'D:\gis\HydroBASINS\without_lakes\simplified\hybas_au_lev08_v1c.shp')
# files = glob.glob(r'D:\gis\GRanD\GRanD_dams_v1_1.shp')
files = glob.glob(r'D:\gis\GRanD\GRanD_reservoirs_v1_1.shp')
# files = glob.glob(r'D:\gis\HydroBASINS\rivers\*_riv_15s.shp')
# files = [r'D:\gis\OpenStreetMaps\Asia\Australia\rivers_lines.shp',
#          r'D:\gis\OpenStreetMaps\Asia\Australia\rivers_multipolygons.shp']
# files = [r'D:\gis\OpenStreetMaps\simplified-land-polygons-complete-3857\simplified_land_polygons.shp']
    
# convert(r'D:\src\GitHub\data-utils\notebooks\shapefile.shp')
# convert(r'D:\gis\HydroBASINS\without_lakes\hybas_au_lev07_v1c.shp')
for f in files:
    convert(f)
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [ ]:
    
import fiona
c = fiona.collection(files[0], "r")
print(c.schema)
type_map = { 'float' : ogr.OFTReal, 'int' : ogr.OFTInteger }
fields = c.schema['properties']
for p in fields:
    print p + ' ' + fields[p]
    
In [3]:
    
# Select sub-basins that fall within a larger basin and then group them by using a regular grid
# Amazon
# main_basin_shp = 'D:\gis\HydroBASINS\without_lakes\hybas_sa_lev02_v1c.shp'
# main_basin_id = 6020006540
# sub_basin_shp = 'D:\gis\HydroBASINS\without_lakes\hybas_sa_lev06_v1c.shp'
# Murray & Darling
main_basin_shp = 'D:\gis\HydroBASINS\without_lakes\hybas_au_lev03_v1c.shp'
main_basin_id = 5030073410
sub_basin_shp = 'D:\gis\HydroBASINS\without_lakes\hybas_au_lev07_v1c.shp'
pylab.rcParams['figure.figsize'] = (17.0, 20.0)
fig = plt.figure()
axes = plt.axes()
axes.set_aspect('equal', 'datalim')
main_basin_geom = None
with fiona.collection(main_basin_shp, "r") as input:
    for f in input:
        geom = sl.geometry.shape(f['geometry'])
        id = f['properties']['HYBAS_ID']
        if id == main_basin_id:
            main_basin_geom = geom
        draw(geom)
bounds = main_basin_geom.bounds
axes.set_xlim(bounds[0], bounds[2])
axes.set_ylim(bounds[1], bounds[3])
                                
with fiona.collection(sub_basin_shp, "r") as input:
    for f in input:
        geom = sl.geometry.shape(f['geometry'])
        if main_basin_geom.contains(geom):
            draw(geom, fill='#aaaaff', alpha=0.5)
        else:
            pass
            # draw(geom, fill='#ffffff', lw=0.1)
plt.show()
    
    
    
In [7]:
    
def get_basin(id, path):
    with fiona.collection(path, "r") as input:
        for f in input:
            id = f['properties']['HYBAS_ID']
            if id == main_basin_id:
                return f
    return None
def get_child_basins(parent_pfaf_id, child_basins_path):
    with fiona.collection(child_basins_path, "r") as input:
        for feature in input:
            pfaf_id = feature['properties']['PFAF_ID'] 
            if str(pfaf_id).startswith(str(parent_pfaf_id)): 
                yield feature
# Amazon
# files = glob.glob(r'D:\gis\HydroBASINS\without_lakes\hybas_sa_lev*_v1c.shp')[1:]
# main_basin_id = 6020006540
# start = 0
files = glob.glob(r'D:\gis\HydroBASINS\without_lakes\hybas_au_lev*_v1c.shp')[1:]
main_basin_id = 5030073410
start = 2
main_basin = get_basin(main_basin_id, files[start])
pfaf_id = str(main_basin['properties']['PFAF_ID'])
main_basin_geom = sl.geometry.shape(main_basin['geometry'])
print('Main basin: ' + files[0])
print('Area: ' + str(main_basin_geom.area))
total_count = 0
def get_children(main_basin_id, child_basins_path):
    print('Child basins: ' + child_basins_path)
    child_basins = list(get_child_basins(main_basin_id, child_basins_path))
    child_basins_ids = [str(f['properties']['PFAF_ID']) for f in child_basins]
    child_basins_geoms = [sl.geometry.shape(f['geometry']) for f in child_basins]
    count = len(child_basins)
    global total_count
    total_count += count
    print(count)
    
    return (child_basins, child_basins_ids, child_basins_geoms)
basins3, ids3, geoms3 = get_children(pfaf_id, files[start + 1])
basins4, ids4, geoms4 = get_children(pfaf_id, files[start + 2])
basins5, ids5, geoms5 = get_children(pfaf_id, files[start + 3])
basins6, ids6, geoms6 = get_children(pfaf_id, files[start + 4])
basins7, ids7, geoms7 = get_children(pfaf_id, files[start + 5])
basins8, ids8, geoms8 = get_children(pfaf_id, files[start + 6])
basins_per_level = [basins3, basins4, basins5, basins6, basins7, basins8]
print('Total basins: ' + str(total_count))
    
    
Put all catchments into a graph
In [8]:
    
import networkx as nx
n = nx.DiGraph()
ids = [pfaf_id]+ids3+ids4+ids5+ids6+ids7
areas = [g.area for g in [main_basin_geom]+geoms3+geoms4+geoms5+geoms6+geoms7]
basins = [b for b in [main_basin]+basins3+basins4+basins5+basins6+basins7]
n.add_nodes_from([(i,{'area':a, 'feature':f}) for (i,a,f) in zip(ids, areas, basins)])
# n.add_nodes_from([(i,{'area':a}) for (i,a,f) in zip(ids, areas, basins)])
n.add_edges_from([(pfaf_id, i) for i in ids3])
n.add_edges_from([(i[:-1], i) for i in ids4])
n.add_edges_from([(i[:-1], i) for i in ids5])
n.add_edges_from([(i[:-1], i) for i in ids6])
n.add_edges_from([(i[:-1], i) for i in ids7])
# max_level = 6
# n = n.subgraph([kv[0] for kv in list(n.degree().iteritems()) if len(kv[0]) < max_level + 2])
    
In [9]:
    
nodes=dict(n.nodes(data=True))
shape=sl.geometry.shape(nodes['5643001']['feature']['geometry'])
shape
    
    Out[9]:
In [10]:
    
def get_wh(geom):
    bounds = geom.bounds
    w = bounds[2]-bounds[0]
    h = bounds[3]-bounds[1]
    
    return (w, h) 
    
(w, h) = get_wh(shape)
print('w', w)
print('h', h)
    
    
In [11]:
    
def geometry_larger_than(geom, w, h):
    bounds = geom.bounds
    gw = bounds[2]-bounds[0]
    gh = bounds[3]-bounds[1]
    return (gw > w) or (gh > h)
def geometry_smaller_than(geom, w, h):
    bounds = geom.bounds
    gw = bounds[2]-bounds[0]
    gh = bounds[3]-bounds[1]
    return (gw < w) and (gh < h)
def get_leaves_smaller_than(node, w, h):
    for child_node in n.successors(node):
        geom = sl.geometry.shape(nodes[child_node]['feature']['geometry'])
        
        last = len(n.successors(child_node)) == 0
    
        if(geometry_smaller_than(geom, w, h) or last):
            yield child_node
        else:
            for child_child_node in get_leaves_smaller_than(child_node, w, h):
                yield child_child_node
    
In [65]:
    
max_w = 2.0
max_h = 2.0
    
In [66]:
    
leaves = list(get_leaves_smaller_than('564', max_w, max_h))
print(len(leaves))
s = n.subgraph(leaves)
    
    
In [53]:
    
basins_wh = []
basins_wh_min = []
basins_wh_max = []
basins_count = []
for basins in basins_per_level:
    whs = [get_wh(sl.geometry.shape(basin['geometry'])) for basin in basins]
    max_wh = np.max([max(whi) for whi in whs])
    min_wh = np.min([min(whi) for whi in whs])
    count = len(basins)
    wh = 0.5*(max_wh+min_wh)
    basins_wh_min.append(min_wh)
    basins_wh_max.append(max_wh)
    basins_wh.append(wh)
    basins_count.append(count)
    print(min_wh, wh, max_wh, count)
    
    
In [59]:
    
# show number of catchments per level + maximum Width / Height
min_wh_values = []
max_wh_values = []
wh_values = []
average_wh_values = []
catchment_count = []
for i in np.arange(0.2, 9.0, 0.2):
    selection = list(get_leaves_smaller_than('564', i, i))
    
    wh = [get_wh(sl.geometry.shape(nodes[basin]['feature']['geometry'])) for basin in selection]
    max_wh = np.max([max(whi) for whi in wh])
    min_wh = np.min([min(whi) for whi in wh])
    average_wh_values.append(0.5*(min_wh+max_wh))
    count = len(basins)
    count = len(selection)
    print(min_wh, i, max_wh, count)
    catchment_count.append(count)
    max_wh_values.append(max_wh)
    min_wh_values.append(min_wh)
    
    wh_values.append(i)
    
    
In [60]:
    
plt.plot(catchment_count, wh_values, 'ob-', linewidth=2.0)
plt.plot(catchment_count, average_wh_values, 'b-')
plt.plot(catchment_count, min_wh_values, 'b--')
plt.plot(catchment_count, max_wh_values, 'b--')
# plt.errorbar(wh_values, catchment_count,xerr=[min_error_wh_values, max_error_wh_values], fmt='.')
# plt.errorbar(wh_values, catchment_count, xerr=error_wh_values)
plt.plot(basins_count, basins_wh, 'or-', linewidth=2.0)
plt.plot(basins_count, basins_wh_min, 'r--')
plt.plot(basins_count, basins_wh_max, 'r--')
# plt.errorbar(basins_wh, basins_count, xerr=[basins_wh_error_min,basins_wh_error_max])
    
    Out[60]:
    
In [198]:
    
large_nodes = list((node for (node, data) in n.nodes(data=True) 
                    if geometry_larger_than(sl.geometry.shape(data['feature']['geometry']), max_w, max_h)))
s = n.subgraph(large_nodes)
print(len(large_nodes))
leaves=[node for node,data in s.out_degree().items() if data==0]
print(len(list(leaves)))
s = n.subgraph(leaves)
    
    
    Out[198]:
Try to open it in http://visjs.org/examples/network/15_dot_language_playground.html
In [14]:
    
nx.write_dot(n, r"..\output\catchment_pfafids.dot")
print(len(n.nodes()))
    
    
In [69]:
    
pos = nx.spring_layout(nx.Graph(n), iterations=500)
    
In [70]:
    
import math
node_areas=[data['area'] for node,data in n.nodes(data=True)]
node_sizes=[math.log(a)*100 for a in node_areas]
nx.draw_networkx_nodes(n, pos, node_color=node_areas, node_size=node_sizes, cmap=plt.cm.Blues, alpha=0.85)
_ = nx.draw_networkx_edges(n, pos, alpha=0.2)
# _ = nx.draw_networkx_labels(n, pos, font_size=15)
# nx.draw_networkx_nodes(s, pos, node_size=20, cmap=plt.cm.Reds, alpha=0.85)
    
    
    
In [335]:
    
leaf_basins = list(s.nodes(data=True))
fig = plt.figure()
axes = plt.axes()
axes.set_aspect('equal', 'datalim')
for f in leaf_basins:
    geom = sl.geometry.shape(f[1]['feature']['geometry'])
    draw(geom, alpha=0.5)
plt.show()
    
    
In [67]:
    
# write as a selection shp file
with fiona.open(files[2]) as source:
    source_driver = source.driver
    source_crs = source.crs
    source_schema = source.schema
# strange bug? values are not written unless we add .1
source_schema['properties']['HYBAS_ID'] = 'float:11.1'
source_schema['properties']['NEXT_DOWN'] = 'float:11.1'
source_schema['properties']['NEXT_SINK'] = 'float:11.1'
source_schema['properties']['MAIN_BAS'] = 'float:11.1'
print(source_schema)
    
with fiona.open('../output/small_basin_selection.shp', 'w', driver=source_driver, crs=source_crs, schema=source_schema) as c:
    for node in s.nodes(data=True):
        feature = node[1]['feature']
        c.write(feature)
    
    
In [21]:
    
print(n.predecessors('5642'))
print(n.successors('5642'))
s = n.subgraph(n.successors('5642')+['5642','564','56424','5648','56480'])
node_areas=[data['area'] for node,data in s.nodes(data=True)]
node_sizes=[a*5 for a in node_areas]
pos = nx.spring_layout(nx.Graph(s), iterations=500, scale=10.0)
nx.draw_networkx_nodes(s, pos, node_color=node_areas, node_size=node_sizes, cmap=plt.cm.Blues, alpha=0.85)
_ = nx.draw_networkx_edges(s, pos, alpha=0.2)
_ = nx.draw_networkx_labels(s, pos, font_size=15)
    
    
    
Select basins where Area + Width/Height ratio are just less than a given parameter
In [20]:
    
def get_catchments_using_area(graph, root, max_area):
    children = graph.successors(root)
    for child in children:
        graph[child]
    
print(n.predecessors('5642'))
print(n.successors('5642'))